library(Seurat)
## Warning: package 'Seurat' was built under R version 4.0.4
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.4
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.4
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.4
library(cowplot)
library(SeuratWrappers)
library(monocle3)
## Warning: package 'matrixStats' was built under R version 4.0.4
source("R:/People Folders/Connor/R_functions/Find_nCount_minimum.R")
source("R:/People Folders/Connor/R_functions/UMAP_params.R")
D59 = Read10X("R:/Genomics/RNA-seq/Human\ fetal\ SC\ #1\ RNAseq\ Dev/59d_SI-3A-E3/outs/filtered_gene_bc_matrices/GRCh38")
D59 = CreateSeuratObject(D59)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
D59$time_point = "D59"
D59 = subset(D59, subset = nCount_RNA < quantile(D59$nCount_RNA, probs = .95)["95%"])
find_cutoff(D59, min_cutoff = 20, dist = 200)
## Warning in find_cutoff(D59, min_cutoff = 20, dist = 200): Setting min_cutoff overrides min_cutoff_pct.
## All values with less than 20 will not be considered. This cutoff removes 2.287166 % of cells.

## [1] 2100 3100 3900
D59 = subset(D59, subset = nCount_RNA > 1000,)
hist(D59$nCount_RNA)

D59 = NormalizeData(D59)
D59 = FindVariableFeatures(D59)
D59 = ScaleData(D59, features = rownames(D59))
D59 = RunPCA(D59, npcs = 100)
ElbowPlot(D59, ndims = 100)

D59 = RunUMAP(D59, dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
D59 = FindNeighbors(D59, dims = 1:30)
D59 = FindClusters(D59, resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3168
## Number of edges: 143418
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7245
## Number of communities: 16
## Elapsed time: 0 seconds
D59 = FindClusters(D59, resolution = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3168
## Number of edges: 143418
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8099
## Number of communities: 13
## Elapsed time: 0 seconds
DimPlot(D59)

ggplotly(DimPlot(D59))
UMAP_test_square(D59,
min_dist_range = seq(.1, .7, length.out = 5),
n.neighbors_range = c(5, 50, 500),
pcs_range = 1:20)
## [1] "Starting entry 1 in row 1"
## [1] "finished entry 1 in row 1"
## [1] "Starting entry 2 in row 1"
## [1] "finished entry 2 in row 1"
## [1] "Starting entry 3 in row 1"
## [1] "finished entry 3 in row 1"
## [1] "finished row 1"
## [1] "Starting entry 1 in row 2"
## [1] "finished entry 1 in row 2"
## [1] "Starting entry 2 in row 2"
## [1] "finished entry 2 in row 2"
## [1] "Starting entry 3 in row 2"
## [1] "finished entry 3 in row 2"
## [1] "finished row 2"
## [1] "Starting entry 1 in row 3"
## [1] "finished entry 1 in row 3"
## [1] "Starting entry 2 in row 3"
## [1] "finished entry 2 in row 3"
## [1] "Starting entry 3 in row 3"
## [1] "finished entry 3 in row 3"
## [1] "finished row 3"
## [1] "Starting entry 1 in row 4"
## [1] "finished entry 1 in row 4"
## [1] "Starting entry 2 in row 4"
## [1] "finished entry 2 in row 4"
## [1] "Starting entry 3 in row 4"
## [1] "finished entry 3 in row 4"
## [1] "finished row 4"
## [1] "Starting entry 1 in row 5"
## [1] "finished entry 1 in row 5"
## [1] "Starting entry 2 in row 5"
## [1] "finished entry 2 in row 5"
## [1] "Starting entry 3 in row 5"
## [1] "finished entry 3 in row 5"
## [1] "finished row 5"

D59_3d = RunUMAP(D59, dims = 1:30, n.components = 3)
D59_3d_frame = D59_3d@meta.data
D59_3d_frame = cbind(D59_3d_frame, D59_3d@reductions$umap@cell.embeddings)
plot_ly(D59_3d_frame, x = ~UMAP_1, y = ~UMAP_2, z = ~UMAP_3, color = ~seurat_clusters, size = 1)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
FeaturePlot(D59, c("ATOH7", "CRX", "GAP43", "VSX1", "POU4F3", "TFAP2A"), order = T)

FeaturePlot(D59, c("ATOH7", "CRX", "PRDM1", "AIF1", "POU4F2", "CCND1"), order = T)

FeaturePlot(D59, c("ATOH7", "CRX", "PRDM1", "TNF", "GFAP", "CCND1"), order = T)

D59$type = recode(D59$RNA_snn_res.1,
`0` = "RGC", #GAP43
`1` = "RGC", #GAP43
`2` = "RGC", #GAP43
`3` = "Progen", #location CCND1
`4` = "Progen", #location CCND1
`5` = "AC", # TFAP2A
`6` = "RGC_transition", #Pou
`7` = "T0", #maybe T3 also
`8` = "AC_transition", #TFAP2A
`9` = "Progen", #location CCND1
`10` = "RGC", #GAP43
`11` = "Progen", #location CCND1
`12` = "Astrocyte?"
)
D59$type = as.character(D59$type)
D59@meta.data[D59$RNA_snn_res.2 == 14, "type"] = "Microglia" #they did not want to split from the progenitors
ggplotly(DimPlot(D59, group.by = "type"))
A lot of monocle is interactive so I am just loading in the cds I made already
# D59.cds <- as.cell_data_set(D59)
# D59.cds <- cluster_cells(cds = D59.cds, reduction_method = "UMAP")
# D59.cds <- learn_graph(D59.cds, use_partition = TRUE, learn_graph_control = list(prune_graph = F, minimal_branch_len = 5))
#
# D59.cds <- order_cells(D59.cds)
D59.cds = readRDS("D59_fetal_cds.RDS")
plot_cells(D59.cds, color_cells_by = "pseudotime")
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.

# saveRDS(D59.cds, "D59_fetal_cds.RDS")
# D59.RGC <- choose_graph_segments(D59.cds)
# rgc_cells = colnames(D59.RGC)
#
# rm(D59.RGC)
# D59.AC <- choose_graph_segments(D59.cds)
# AC_cells = colnames(D59.AC)
#
# rm(D59.AC)
# D59.Photo <- choose_graph_segments(D59.cds)
# Photo_cells = colnames(D59.Photo)
#
# rm(D59.Photo)
since all this comes from interactive input above I am also going to load in the old object
# D59$branch = "not assigned"
#
# D59$pseudotime = pseudotime(D59.cds)
#
# D59@meta.data[rgc_cells, "branch"] = "RGC"
#
# D59$RGC_branch = 0
# D59@meta.data[rgc_cells, "RGC_branch"] = 1
# D59@meta.data[rgc_cells, "branch"] = "RGC"
#
# D59$AC_branch = 0
# D59@meta.data[AC_cells, "AC_branch"] = 1
# D59@meta.data[AC_cells, "branch"] = "AC"
#
# D59$Photo_branch = 0
# D59@meta.data[Photo_cells, "Photo_branch"] = 1
# D59@meta.data[Photo_cells, "branch"] = "Photo"
#
#
# D59@meta.data[intersect(Photo_cells, AC_cells), "branch"] = "Photo/AC"
# D59@meta.data[intersect(intersect(Photo_cells, AC_cells), rgc_cells), "branch"] = "T0"
# D59@meta.data[D59$type == "Progen", "branch"] = "Progen"
# D59@meta.data[D59$type == "Microglia", "branch"] = "not assigned"
D59 = readRDS("D59_branches.RDS")
# I don't like that the microglia aren't splitting out, I want to fix that
DimPlot(D59, group.by = "branch")

# saveRDS(D59, "D59_branches.RDS")
#I made this a while ago
tf_dict = read.csv(file = "R:/Genomics/scATAC-seq/Marcus and Connor/Useful data tables/motif_gene_name_datatable.csv")
d59_genes = Matrix::rowSums(D59@assays$RNA@data > 0)
d59_genes = d59_genes[d59_genes > 50]
motifs_in_D53 = intersect(tf_dict$HGNC.symbol, names(d59_genes))
length(motifs_in_D53)
## [1] 262
length(tf_dict$HGNC.symbol)
## [1] 805
tf_D59 = tf_dict[tf_dict$HGNC.symbol %in% motifs_in_D53,]
length(unique(tf_D59$motif_names))
## [1] 360
D59_meta_RGC = D59@meta.data
D59_meta_RGC = D59_meta_RGC[D59_meta_RGC$RGC_branch == 1,]
D59_meta_RGC = D59_meta_RGC[order(D59_meta_RGC$pseudotime),]
tf_mtx = D59@assays$RNA@scale.data
tf_mtx = tf_mtx[unique(motifs_in_D53),rownames(D59_meta_RGC)]
pheatmap::pheatmap(tf_mtx,
show_colnames = F,
annotation_col = D59_meta_RGC[c("type", "pseudotime")],
breaks = seq(-2, 2, length.out = 100),
cluster_cols = F)
